library(tidyverse)
## ── Attaching packages ─────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
#setwd("~/Google Drive/_OneDrive_Atimi_Software/Upgrad/_Upgrad/Capstone_project/")
general_info <- read.csv(file = "ValidFiles/Hospital General Information.csv",header = T,check.names = T,na.strings = c("Not Available",""),
stringsAsFactors = T)
str(general_info)
## 'data.frame': 4818 obs. of 28 variables:
## $ Provider.ID : int 10001 10005 10006 10007 10008 10011 10012 10016 10018 10019 ...
## $ Hospital.Name : Factor w/ 4617 levels "ABBEVILLE AREA MEDICAL CENTER",..: 3685 2227 1107 2584 913 3952 983 3592 506 1568 ...
## $ Address : Factor w/ 4789 levels " CALLE CONCEPCION VERA AYALA #550 S",..: 468 2113 1727 3917 239 3219 1589 138 1359 810 ...
## $ City : Factor w/ 2949 levels "ABBEVILLE","ABERDEEN",..: 699 263 875 1931 1527 236 903 20 236 2413 ...
## $ State : Factor w/ 56 levels "AK","AL","AR",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ ZIP.Code : int 36301 35957 35631 36467 36049 35235 35968 35007 35233 35660 ...
## $ County.Name : Factor w/ 1565 levels "ABBEVILLE","ACADIA",..: 668 879 803 350 360 711 394 1300 711 317 ...
## $ Phone.Number : num 3.35e+09 2.57e+09 2.57e+09 3.34e+09 3.34e+09 ...
## $ Hospital.Type : Factor w/ 3 levels "Acute Care Hospitals",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Hospital.Ownership : Factor w/ 10 levels "Government - Federal",..: 2 2 2 10 6 10 6 10 10 2 ...
## $ Emergency.Services : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Meets.criteria.for.meaningful.use.of.EHRs : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ Hospital.overall.rating : int 3 3 2 3 3 2 3 3 NA 2 ...
## $ Hospital.overall.rating.footnote : Factor w/ 5 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA 4 NA ...
## $ Mortality.national.comparison : Factor w/ 3 levels "Above the National average",..: 3 2 2 3 3 3 2 3 NA 2 ...
## $ Mortality.national.comparison.footnote : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA 3 NA ...
## $ Safety.of.care.national.comparison : Factor w/ 3 levels "Above the National average",..: 1 3 3 3 NA 2 3 1 NA 2 ...
## $ Safety.of.care.national.comparison.footnote : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA 3 NA NA NA 3 NA ...
## $ Readmission.national.comparison : Factor w/ 3 levels "Above the National average",..: 3 1 3 2 3 3 3 1 NA 2 ...
## $ Readmission.national.comparison.footnote : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA 3 NA ...
## $ Patient.experience.national.comparison : Factor w/ 3 levels "Above the National average",..: 2 3 2 3 NA 2 3 2 NA 2 ...
## $ Patient.experience.national.comparison.footnote : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA 3 NA NA NA 3 NA ...
## $ Effectiveness.of.care.national.comparison : Factor w/ 3 levels "Above the National average",..: 3 3 3 3 3 2 3 1 2 3 ...
## $ Effectiveness.of.care.national.comparison.footnote : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA NA NA ...
## $ Timeliness.of.care.national.comparison : Factor w/ 3 levels "Above the National average",..: 3 1 1 1 1 3 1 2 3 1 ...
## $ Timeliness.of.care.national.comparison.footnote : Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA NA NA NA NA NA NA NA ...
## $ Efficient.use.of.medical.imaging.national.comparison : Factor w/ 3 levels "Above the National average",..: 3 2 3 NA NA 3 3 2 NA 2 ...
## $ Efficient.use.of.medical.imaging.national.comparison.footnote: Factor w/ 3 levels "Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Qual"| __truncated__,..: NA NA NA 3 3 NA NA NA 3 NA ...
zdemographics <- c("Hospital.Name","Address","City","State","County.Name","Phone.Number","ZIP.Code")
zdemogrphic_vars <- which(names(general_info) %in% zdemographics)
zvar1 <- c("Hospital.Type","Hospital.Ownership","Emergency.Services","Meets.criteria.for.meaningful.use.of.EHRs")
zvar2 <- which(names(general_info) %in% zvar1)
general_info_cleaned <- general_info[,-c(zdemogrphic_vars,zvar2)]
general_info_cleaned$Hospital.overall.rating <- as.factor(general_info_cleaned$Hospital.overall.rating)
“Data suppressed by CMS for one or more quarters”
This enables us to conclude that the provider ids related this level can be purged from the dataset
general_info_cleaned %>% filter(general_info_cleaned$Hospital.overall.rating.footnote %in%
c("Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs",
"Data suppressed by CMS for one or more quarters")) %>% group_by(Hospital.overall.rating,Mortality.national.comparison,Readmission.national.comparison,Safety.of.care.national.comparison,Efficient.use.of.medical.imaging.national.comparison,Timeliness.of.care.national.comparison,Effectiveness.of.care.national.comparison,Patient.experience.national.comparison) %>%
summarise(count_rws = n()) %>% t()
## [,1]
## Hospital.overall.rating NA
## Mortality.national.comparison NA
## Readmission.national.comparison NA
## Safety.of.care.national.comparison NA
## Efficient.use.of.medical.imaging.national.comparison NA
## Timeliness.of.care.national.comparison NA
## Effectiveness.of.care.national.comparison NA
## Patient.experience.national.comparison NA
## count_rws "212"
general_info_cleaned <- general_info_cleaned %>%
filter(!general_info_cleaned$Hospital.overall.rating.footnote %in%
c("Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs",
"Data suppressed by CMS for one or more quarters"))
barplot(prop.table(table(general_info_cleaned$Hospital.overall.rating)))
Barplot of the ratings show that the Hospital rating has a gaussian distribution
“Results are not available for this reporting period”
general_info_cleaned %>%
filter(general_info_cleaned$Hospital.overall.rating.footnote %in%
c("Results are not available for this reporting period",
"There are too few measures or measure groups reported to calculate a star rating or measure group score")) %>% summary()
## Provider.ID Hospital.overall.rating
## Min. : 10018 1 : 0
## 1st Qu.:161340 2 : 0
## Median :261325 3 : 0
## Mean :277742 4 : 0
## 3rd Qu.:400104 5 : 0
## Max. :670112 NA's:958
## Hospital.overall.rating.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 1
## There are too few measures or measure groups reported to calculate a star rating or measure group score :957
## This hospital\x92s star rating only includes data reported on inpatient services : 0
##
## Mortality.national.comparison
## Above the National average : 0
## Below the National average : 0
## Same as the National average:118
## NA's :840
##
##
## Mortality.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period :840
## NA's :118
##
##
## Safety.of.care.national.comparison
## Above the National average : 1
## Below the National average : 0
## Same as the National average: 20
## NA's :937
##
##
## Safety.of.care.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period :937
## NA's : 21
##
##
## Readmission.national.comparison
## Above the National average : 0
## Below the National average : 12
## Same as the National average:280
## NA's :666
##
##
## Readmission.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period :666
## NA's :292
##
##
## Patient.experience.national.comparison
## Above the National average : 67
## Below the National average : 3
## Same as the National average: 18
## NA's :870
##
##
## Patient.experience.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period :870
## NA's : 88
##
##
## Effectiveness.of.care.national.comparison
## Above the National average : 0
## Below the National average : 60
## Same as the National average:199
## NA's :699
##
##
## Effectiveness.of.care.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period :699
## NA's :259
##
##
## Timeliness.of.care.national.comparison
## Above the National average : 89
## Below the National average : 0
## Same as the National average:105
## NA's :764
##
##
## Timeliness.of.care.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period :764
## NA's :194
##
##
## Efficient.use.of.medical.imaging.national.comparison
## Above the National average : 0
## Below the National average : 6
## Same as the National average: 32
## NA's :920
##
##
## Efficient.use.of.medical.imaging.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period :920
## NA's : 38
##
##
general_info_cleaned <- general_info_cleaned %>%
filter(!general_info_cleaned$Hospital.overall.rating.footnote %in%
c("Results are not available for this reporting period",
"There are too few measures or measure groups reported to calculate a star rating or measure group score"))
summary(general_info_cleaned)
## Provider.ID Hospital.overall.rating
## Min. : 10001 1: 117
## 1st Qu.:140014 2: 684
## Median :260018 3:1772
## Mean :264497 4: 964
## 3rd Qu.:390113 5: 111
## Max. :670098
## Hospital.overall.rating.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 0
## There are too few measures or measure groups reported to calculate a star rating or measure group score : 0
## This hospital\x92s star rating only includes data reported on inpatient services : 172
## NA's :3476
## Mortality.national.comparison
## Above the National average : 402
## Below the National average : 343
## Same as the National average:2618
## NA's : 285
##
##
## Mortality.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 285
## NA's :3363
##
##
## Safety.of.care.national.comparison
## Above the National average : 804
## Below the National average : 673
## Same as the National average:1428
## NA's : 743
##
##
## Safety.of.care.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 743
## NA's :2905
##
##
## Readmission.national.comparison
## Above the National average : 811
## Below the National average : 860
## Same as the National average:1853
## NA's : 124
##
##
## Readmission.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 124
## NA's :3524
##
##
## Patient.experience.national.comparison
## Above the National average :1135
## Below the National average :1097
## Same as the National average:1143
## NA's : 273
##
##
## Patient.experience.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 273
## NA's :3375
##
##
## Effectiveness.of.care.national.comparison
## Above the National average : 997
## Below the National average : 449
## Same as the National average:1997
## NA's : 205
##
##
## Effectiveness.of.care.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 205
## NA's :3443
##
##
## Timeliness.of.care.national.comparison
## Above the National average :1014
## Below the National average : 903
## Same as the National average:1376
## NA's : 355
##
##
## Timeliness.of.care.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 355
## NA's :3293
##
##
## Efficient.use.of.medical.imaging.national.comparison
## Above the National average : 359
## Below the National average : 368
## Same as the National average:2027
## NA's : 894
##
##
## Efficient.use.of.medical.imaging.national.comparison.footnote
## Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs: 0
## Data suppressed by CMS for one or more quarters : 0
## Results are not available for this reporting period : 894
## NA's :2754
##
##
z_rem_var1 <- which(names(general_info_cleaned) %in% c("Provider.ID","Hospital.overall.rating"))
general_info_final <- general_info_cleaned[is.na(general_info_cleaned$Hospital.overall.rating.footnote),z_rem_var1]
summary(general_info_final)
## Provider.ID Hospital.overall.rating
## Min. : 10001 1: 115
## 1st Qu.:140053 2: 661
## Median :260004 3:1668
## Mean :264810 4: 921
## 3rd Qu.:390100 5: 111
## Max. :670098
dim(general_info_final)
## [1] 3476 2
complications_df <- read.csv(file = "ValidFiles//Complications - Hospital.csv",
header = T,check.names = T,stringsAsFactors = T,na.strings = c('Not Available',""))
head(complications_df)
zdemographics <- c("Hospital.Name","Address","City","State","ZIP.Code","County.Name","Phone.Number","Measure.Start.Date", "Measure.End.Date")
zdemogrphic_vars <- which(names(complications_df) %in% zdemographics)
complications_df_cleaned <- complications_df[,-zdemogrphic_vars]
head(complications_df_cleaned)
round(prop.table(summary(factor(complications_df_cleaned$Compared.to.National)))*100,2) #- Thus NAs are 35% in the Compared.To.National variable
## Better than the National Rate No Different than the National Rate
## 0.79 56.30
## Number of Cases Too Small Worse than the National Rate
## 6.54 1.64
## NA's
## 34.74
round(prop.table(summary(factor(complications_df_cleaned$Footnote)))*100,2) #- NAs are 65% in the Footnote variable
## 1 - The number of cases/patients is too few to report.
## 6.54
## 13 - Results cannot be calculated for this reporting period.
## 24.70
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.
## 3.38
## 4 - Data suppressed by CMS for one or more quarters.
## 1.03
## 5 - Results are not available for this reporting period.
## 3.68
## 7 - No cases met the criteria for this measure.
## 1.94
## NA's
## 58.73
# Distribution of Score and Denominator by Footnote
complications_df_cleaned %>% group_by(Footnote) %>%
summarise(cnt_rows = n(),Avg_Score = mean(Score,na.rm = T),Total_Score = sum(Score,na.rm = T)) %>% arrange(Avg_Score)
zvar1 <- is.na(complications_df$Footnote) # Blank Footnotes
zvar2 <- which(names(complications_df_cleaned) %in% "Footnote")
complications_df_cleaned_footnote_nas <- complications_df_cleaned[zvar1,-zvar2]
summary(complications_df_cleaned_footnote_nas)
## Provider.ID Measure.Name
## Min. : 10001 Serious complications : 3224
## 1st Qu.:110105 Collapsed lung due to medical treatment : 3202
## Median :260017 Accidental cuts and tears from medical treatment: 3199
## Mean :262101 Infections from a large venous catheter : 3191
## 3rd Qu.:390116 Pressure sores : 3078
## Max. :670106 Serious blood clots after surgery : 2984
## (Other) :12246
## Measure.ID
## PSI_90_SAFETY : 3224
## PSI_6_IAT_PTX : 3202
## PSI_15_ACC_LAC : 3199
## PSI_7_CVCBI : 3191
## PSI_3_ULCER : 3078
## PSI_12_POSTOP_PULMEMB_DVT: 2984
## (Other) :12246
## Compared.to.National Denominator
## Better than the National Rate : 419 Min. : 25
## No Different than the National Rate:29837 1st Qu.: 163
## Number of Cases Too Small : 0 Median : 697
## Worse than the National Rate : 868 Mean : 2260
## 3rd Qu.: 2713
## Max. :75770
## NA's :3224
## Score Lower.Estimate Higher.Estimate
## Min. : 0.030 Min. : 0.00 Min. : 0.06
## 1st Qu.: 0.330 1st Qu.: 0.00 1st Qu.: 0.71
## Median : 1.065 Median : 0.18 Median : 1.88
## Mean : 10.035 Mean : 6.11 Mean : 14.11
## 3rd Qu.: 3.200 3rd Qu.: 1.38 3rd Qu.: 5.33
## Max. :212.160 Max. :175.29 Max. :249.04
##
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
complications_df_cleaned_footnote_nas %>% group_by(Measure.ID) %>%
summarise(cnt_rws = n(), Sum_score = sum(Score,na.rm = T),
mean_Score = mean(Score,na.rm = T)) %>%
arrange(desc(cnt_rws)) %>% melt(value.name = c("value")) %>%
ggplot(aes(x = Measure.ID,y = value)) + geom_col() + facet_wrap(facets = ~ variable,scales = "free",ncol = 3) +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5,hjust = 1))
## Using Measure.ID as id variables
complications_df_cleaned_footnote_nas[is.na(complications_df_cleaned_footnote_nas$Denominator),] %>% summary()
## Provider.ID
## Min. : 10001
## 1st Qu.:110110
## Median :260024
## Mean :263357
## 3rd Qu.:390148
## Max. :670106
##
## Measure.Name
## Serious complications :3224
## A wound that splits open after surgery on the abdomen or pelvis: 0
## Accidental cuts and tears from medical treatment : 0
## Blood stream infection after surgery : 0
## Broken hip from a fall after surgery : 0
## Collapsed lung due to medical treatment : 0
## (Other) : 0
## Measure.ID
## PSI_90_SAFETY :3224
## COMP_HIP_KNEE : 0
## PSI_12_POSTOP_PULMEMB_DVT: 0
## PSI_13_POST_SEPSIS : 0
## PSI_14_POSTOP_DEHIS : 0
## PSI_15_ACC_LAC : 0
## (Other) : 0
## Compared.to.National Denominator
## Better than the National Rate : 101 Min. : NA
## No Different than the National Rate:2937 1st Qu.: NA
## Number of Cases Too Small : 0 Median : NA
## Worse than the National Rate : 186 Mean :NaN
## 3rd Qu.: NA
## Max. : NA
## NA's :3224
## Score Lower.Estimate Higher.Estimate
## Min. :0.440 Min. :0.1500 Min. :0.660
## 1st Qu.:0.790 1st Qu.:0.3900 1st Qu.:1.110
## Median :0.875 Median :0.4900 Median :1.250
## Mean :0.890 Mean :0.5378 Mean :1.242
## 3rd Qu.:0.960 3rd Qu.:0.6400 3rd Qu.:1.370
## Max. :2.140 Max. :1.9400 Max. :2.460
##
Significant Measures from these measure Ids are “PSI_90_SAFETY”,“PSI_4_SURG_COMP” “COMP_HIP_KNEE” is the other significant measure
Important measures are
zvar1 <- c( "Provider.ID", "Measure.ID", "Score")
zvar2 <- which(names(complications_df_cleaned_footnote_nas) %in% zvar1)
complications_df_final <- complications_df_cleaned_footnote_nas[,zvar2] %>% spread(key = Measure.ID,value = Score)
summary(complications_df_final)
## Provider.ID COMP_HIP_KNEE PSI_12_POSTOP_PULMEMB_DVT
## Min. : 10001 Min. :1.500 Min. : 1.390
## 1st Qu.:110234 1st Qu.:2.700 1st Qu.: 3.870
## Median :260063 Median :3.000 Median : 4.750
## Mean :264984 Mean :3.042 Mean : 5.035
## 3rd Qu.:390146 3rd Qu.:3.400 3rd Qu.: 5.763
## Max. :670106 Max. :6.000 Max. :20.880
## NA's :770 NA's :500
## PSI_13_POST_SEPSIS PSI_14_POSTOP_DEHIS PSI_15_ACC_LAC PSI_3_ULCER
## Min. : 4.50 Min. :1.180 Min. :0.320 Min. : 0.0300
## 1st Qu.: 9.04 1st Qu.:2.130 1st Qu.:1.140 1st Qu.: 0.2100
## Median : 9.78 Median :2.230 Median :1.380 Median : 0.3400
## Mean :10.25 Mean :2.319 Mean :1.440 Mean : 0.4516
## 3rd Qu.:11.26 3rd Qu.:2.470 3rd Qu.:1.665 3rd Qu.: 0.4600
## Max. :27.96 Max. :4.980 Max. :6.180 Max. :10.3500
## NA's :1176 NA's :944 NA's :285 NA's :406
## PSI_4_SURG_COMP PSI_6_IAT_PTX PSI_7_CVCBI PSI_8_POST_HIP
## Min. : 70.79 Min. :0.1900 Min. :0.0300 Min. :0.06
## 1st Qu.:124.44 1st Qu.:0.3600 1st Qu.:0.1300 1st Qu.:0.06
## Median :135.57 Median :0.4000 Median :0.1600 Median :0.06
## Mean :136.76 Mean :0.4067 Mean :0.1694 Mean :0.06
## 3rd Qu.:148.13 3rd Qu.:0.4400 3rd Qu.:0.1700 3rd Qu.:0.06
## Max. :212.16 Max. :0.8800 Max. :1.2300 Max. :0.06
## NA's :1666 NA's :282 NA's :293 NA's :618
## PSI_90_SAFETY
## Min. :0.440
## 1st Qu.:0.790
## Median :0.875
## Mean :0.890
## 3rd Qu.:0.960
## Max. :2.140
## NA's :260
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
pairs.panels(x = complications_df_final[2:ncol(complications_df_final)],
bg = rainbow(n = 12),
smooth = TRUE,
ellipses = TRUE,pch = 21,cex.cor = 0.05,cex.labels = 0.5)
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in min(diff(breaks)): no non-missing arguments to min; returning
## Inf
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
## Warning in cor(x, y, use = "pairwise", method = method): the standard
## deviation is zero
cor.plot(complications_df_final[,-1],numbers = T,show.legend = F,cex = 0.7,xlas = 2,cex.axis = 0.7)
## Warning in cor(x, use = use, method = method): the standard deviation is
## zero
zvar1 <- which(names(complications_df_final) %in% c("Provider.ID","PSI_90_SAFETY","COMP_HIP_KNEE","PSI_4_SURG_COMP"))
complications_df_final <- complications_df_final[,zvar1]
hai_df <- read.csv(file = "~/Google Drive/_OneDrive_Atimi_Software/Upgrad/_Upgrad/Capstone_project/capstone_project/ValidFiles/Healthcare Associated Infections - Hospital.csv",na.strings = c("Not Available",""))
head(hai_df)
summary(hai_df)
## Provider.ID Hospital.Name
## Min. : 10001 MEMORIAL HOSPITAL : 720
## 1st Qu.:140185 COMMUNITY MEMORIAL HOSPITAL : 384
## Median :260037 GOOD SAMARITAN HOSPITAL : 336
## Mean :267984 SHRINERS HOSPITALS FOR CHILDREN: 336
## 3rd Qu.:390211 ST JOSEPH HOSPITAL : 336
## Max. :670112 MEMORIAL MEDICAL CENTER : 288
## (Other) :228864
## Address City State
## 100 HOSPITAL DRIVE : 336 CHICAGO : 1392 TX : 19536
## 100 MEDICAL CENTER DRIVE: 192 HOUSTON : 1248 CA : 16560
## 200 HOSPITAL DRIVE : 192 LOS ANGELES : 960 FL : 8976
## ONE HOSPITAL DRIVE : 144 DALLAS : 912 IL : 8592
## 1 HOSPITAL DRIVE : 96 PHILADELPHIA: 912 NY : 8352
## 1 MEDICAL CENTER DRIVE : 96 COLUMBUS : 864 PA : 8160
## (Other) :230208 (Other) :224976 (Other):161088
## ZIP.Code County.Name Phone.Number
## Min. : 603 LOS ANGELES: 3984 Min. :9.369e+08
## 1st Qu.:33013 COOK : 2448 1st Qu.:4.018e+09
## Median :55396 JEFFERSON : 2256 Median :6.053e+09
## Mean :53956 MONTGOMERY : 2112 Mean :5.849e+09
## 3rd Qu.:76020 WASHINGTON : 2112 3rd Qu.:7.878e+09
## Max. :99929 (Other) :217632 Max. :9.899e+09
## NA's : 720
## Measure.Name
## C.diff Lower Confidence Limit : 4818
## C.diff Observed Cases : 4818
## C.diff Patient Days : 4818
## C.diff Predicted Cases : 4818
## C.diff Upper Confidence Limit : 4818
## Catheter-associated urinary tract infections (CAUTI) in ICUs and select wards: 4818
## (Other) :202356
## Measure.ID Compared.to.National
## HAI_1_CI_LOWER : 4818 Better than the National Benchmark : 3842
## HAI_1_CI_UPPER : 4818 No Different than National Benchmark: 12884
## HAI_1_DOPC_DAYS: 4818 Worse than the National Benchmark : 808
## HAI_1_ELIGCASES: 4818 NA's :213730
## HAI_1_NUMERATOR: 4818
## HAI_1_SIR : 4818
## (Other) :202356
## Score
## Min. : 0.0
## 1st Qu.: 0.4
## Median : 1.5
## Mean : 2745.3
## 3rd Qu.: 14.1
## Max. :702243.0
## NA's :101886
## Footnote
## 5 - Results are not available for this reporting period. : 33490
## 12 - This measure does not apply to this hospital for this reporting period. : 29598
## 13 - Results cannot be calculated for this reporting period. : 26307
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 7920
## 8 - The lower limit of the confidence interval cannot be calculated if the number of observed infections equals zero. : 3302
## (Other) : 2756
## NA's :127891
## Measure.Start.Date Measure.End.Date
## 1/1/2015:231264 12/31/2015:231264
##
##
##
##
##
##
zdemographics <- c("Hospital.Name","Address","City","State","County.Name","Phone.Number","ZIP.Code","Measure.Start.Date","Measure.End.Date")
hai_df_cleaned <- hai_df[,-which(names(hai_df) %in% c(zdemographics))]
str(hai_df_cleaned)
## 'data.frame': 231264 obs. of 6 variables:
## $ Provider.ID : int 10001 10001 10001 10001 10001 10001 10001 10001 10001 10001 ...
## $ Measure.Name : Factor w/ 48 levels "C.diff Lower Confidence Limit",..: 25 29 26 28 27 19 21 24 20 23 ...
## $ Measure.ID : Factor w/ 48 levels "HAI_1_CI_LOWER",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Compared.to.National: Factor w/ 3 levels "Better than the National Benchmark",..: NA NA NA NA NA 3 NA NA NA NA ...
## $ Score : num 1.03 2.77 7117 9.2 16 ...
## $ Footnote : Factor w/ 8 levels "12 - This measure does not apply to this hospital for this reporting period.",..: NA NA NA NA NA NA NA NA NA NA ...
summary(hai_df_cleaned)
## Provider.ID
## Min. : 10001
## 1st Qu.:140185
## Median :260037
## Mean :267984
## 3rd Qu.:390211
## Max. :670112
##
## Measure.Name
## C.diff Lower Confidence Limit : 4818
## C.diff Observed Cases : 4818
## C.diff Patient Days : 4818
## C.diff Predicted Cases : 4818
## C.diff Upper Confidence Limit : 4818
## Catheter-associated urinary tract infections (CAUTI) in ICUs and select wards: 4818
## (Other) :202356
## Measure.ID Compared.to.National
## HAI_1_CI_LOWER : 4818 Better than the National Benchmark : 3842
## HAI_1_CI_UPPER : 4818 No Different than National Benchmark: 12884
## HAI_1_DOPC_DAYS: 4818 Worse than the National Benchmark : 808
## HAI_1_ELIGCASES: 4818 NA's :213730
## HAI_1_NUMERATOR: 4818
## HAI_1_SIR : 4818
## (Other) :202356
## Score
## Min. : 0.0
## 1st Qu.: 0.4
## Median : 1.5
## Mean : 2745.3
## 3rd Qu.: 14.1
## Max. :702243.0
## NA's :101886
## Footnote
## 5 - Results are not available for this reporting period. : 33490
## 12 - This measure does not apply to this hospital for this reporting period. : 29598
## 13 - Results cannot be calculated for this reporting period. : 26307
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 7920
## 8 - The lower limit of the confidence interval cannot be calculated if the number of observed infections equals zero. : 3302
## (Other) : 2756
## NA's :127891
hai_df_cleaned %>% group_by(Footnote) %>% summarise(count_rows = n(),score_total = sum(Score,na.rm = T))
zvar1 <- which(names(hai_df_cleaned) %in% c("Footnote"))
hai_df_cleaned <- hai_df_cleaned[is.na(hai_df_cleaned$Footnote),-zvar1]
# removing the invalid footnotes and Footnote variable that do not contribute zvar
summary(hai_df_cleaned)
## Provider.ID Measure.Name
## Min. : 10001 CAUTI: Number of Urinary Catheter Days: 3641
## 1st Qu.:110082 CAUTI: Observed Cases : 3641
## Median :250050 CAUTI: Predicted Cases : 3641
## Mean :259546 C.diff Patient Days : 3595
## 3rd Qu.:390096 C.diff Observed Cases : 3594
## Max. :670103 C.diff Predicted Cases : 3594
## (Other) :106185
## Measure.ID Compared.to.National
## HAI_2_DOPC_DAYS: 3641 Better than the National Benchmark : 3828
## HAI_2_ELIGCASES: 3641 No Different than National Benchmark: 12833
## HAI_2_NUMERATOR: 3641 Worse than the National Benchmark : 807
## HAI_6_DOPC_DAYS: 3595 NA's :110423
## HAI_6_ELIGCASES: 3594
## HAI_6_NUMERATOR: 3594
## (Other) :106185
## Score
## Min. : 0.0
## 1st Qu.: 0.4
## Median : 1.6
## Mean : 2770.2
## 3rd Qu.: 14.1
## Max. :702243.0
##
ggplot(data = hai_df_cleaned,aes(x = hai_df_cleaned$Measure.ID,y = Score)) +
geom_boxplot() + xlab("Measure.ID") +
theme(axis.text.x = element_text(angle = 90))
hai_df %>% group_by(Measure.Name,Measure.ID) %>% summarise(cnt_rws = n())
-Clostridium difficile (C. difficile) is a bacteria that causes diarrhea and can lead to serious complications. Those at highest risk for C. difficile infection include people who take antibiotics and also receive care in any medical setting, including hospitals. C. difficile bacteria produce spores that can be spread from patient to patient. Symptoms from C. diff infections often take a few days to develop. Patients are tested for C. diff. infections if they show signs of illness while in the hospital. This measure compares the number of stool specimens that tested positive for C. diff toxin four or more days after the patient entered the hospital to a national benchmark.
CAUTI -Catheter Associated Urinary Tract Infections: A urinary tract infection (UTI) is an infection involving any part of the urinary system, including urethra, bladder, ureters, and kidney. UTIs are the most common type of healthcare-associated infection reported to the National Healthcare Safety Network (NHSN). Among UTIs acquired in the hospital, approximately 75% are associated with a urinary catheter, which is a tube inserted into the bladder through the urethra to drain urine. Between 15-25% of hospitalized patients receive urinary catheters during their hospital stay. The most important risk factor for developing a catheter-associated UTI (CAUTI) is prolonged use of the urinary catheter. Therefore, catheters should only be used for appropriate indications and should be removed as soon as they are no longer needed.
Considering the SIRs, that Centre for Diesease Control and Prevention uses to calculate, Standard Infection Ratio (SIR) which takes into account patient care location, number of patients with an exisiting infection, lab mehtords, bed size, afficialiton with a medical schools, bed size of the hospital, age of patients.
central line-associated bloodstream infections (CLABSI), catheter- associated urinary tract infections (CAUTI), surgical site infection (SSI) from colon surgery or abdominal hysterectomy, methicillin-resistant Staphylococcus Aureus (MRSA) blood laboratory-identified events (bloodstream infections), and Clostridium difficile (C.diff.) laboratory-identified events (intestinal infections). The HAI measures show how often patients in a particular hospital contract certain infections during the couse of their medical treatment
HAI_1_SIR, HAI_1a_SIR, HAI_2_SIR, HAI_2a_SIR, HAI_6_SIR,HAI_4_SIR, HAI_5_SIR, HAI_3_SIR
HAI-1 measure tracks central-line associated bloodstream infections (CLABSI) in ICUs and select wards. HAI-2 measure tracks catheter-associated urinary tract infections (CAUTI) in ICUs and select wards. HAI-3 Surgical Site Infection from colon surgery (SSI: Colon) HAI-4 Surgical Site Infection from abdominal hysterectomy (SSI: Hysterectomy) HAI-5 Methicillin-resistant Staphylococcus Aureus (MRSA) Blood Laboratory-identified Events (Bloodstream infections) HAI-6 Clostridium difficile (C.diff.) Laboratory-identified Events (Intestinal infections)
hai_measures <- c("HAI_1_SIR", "HAI_2_SIR", "HAI_3_SIR", "HAI_4_SIR", "HAI_5_SIR", "HAI_6_SIR")
# Filterig the measure.ids useful for analysis
hai_df_cleaned <- hai_df_cleaned[which(hai_df_cleaned$Measure.ID %in% hai_measures),]
ggplot(data = hai_df_cleaned,aes(x = Measure.ID,y = Score)) +
geom_boxplot(na.rm = T) + xlab("Measure.ID") +
theme(axis.text.x = element_text(angle = 90))
ggplot(data = hai_df_cleaned,aes(x = Measure.ID,y = Score)) +
geom_col(position = position_stack(),na.rm = TRUE) + xlab("Measure.ID") + labs(title = "Hospital Associated Infections by Measure and Score") +
theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1))
ggplot(data = hai_df_cleaned,aes(x = Measure.ID)) +
geom_bar(position = position_stack(),na.rm = TRUE) + xlab("Measure.ID") + labs(title = "Hospital Associated Infections by Measure by Hospital Count") +
theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1))
zvar1 <- c( "Provider.ID", "Measure.ID", "Score")
zvar2 <- which(names(hai_df_cleaned) %in% zvar1)
hai_df_final <- hai_df_cleaned[,zvar2] %>% spread(key = "Measure.ID",value = "Score")
summary(hai_df_final)
## Provider.ID HAI_1_SIR HAI_2_SIR HAI_3_SIR
## Min. : 10001 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:110200 1st Qu.:0.2070 1st Qu.:0.1270 1st Qu.:0.3998
## Median :250110 Median :0.4640 Median :0.4205 Median :0.8325
## Mean :262973 Mean :0.5408 Mean :0.4787 Mean :0.9642
## 3rd Qu.:390155 3rd Qu.:0.7465 3rd Qu.:0.7008 3rd Qu.:1.4085
## Max. :670098 Max. :5.1900 Max. :7.0140 Max. :5.2160
## NA's :943 NA's :432 NA's :1270
## HAI_4_SIR HAI_5_SIR HAI_6_SIR
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.4280 1st Qu.:0.4920
## Median :0.6985 Median :0.8240 Median :0.7985
## Mean :0.8706 Mean :0.9892 Mean :0.8170
## 3rd Qu.:1.3390 3rd Qu.:1.3800 3rd Qu.:1.1140
## Max. :4.8190 Max. :7.0170 Max. :3.7410
## NA's :2454 NA's :1485 NA's :64
gen_inf_compli_df <- merge(x = general_info_final,y = complications_df_final,all = TRUE, by = intersect(x = names(general_info_final),y = names(complications_df_final)))
safety_of_care_group <- merge(x = gen_inf_compli_df,y = hai_df_final,all = TRUE,by = intersect(x = names(gen_inf_compli_df),y = names(hai_df_final)))
head(safety_of_care_group)
hcaphs_df <- read.csv(file = "ValidFiles/HCAHPS - Hospital.csv",header = TRUE,check.names = TRUE,na.strings = c("Not Available","Not Applicable",""))
zdemographics
## [1] "Hospital.Name" "Address" "City"
## [4] "State" "County.Name" "Phone.Number"
## [7] "ZIP.Code" "Measure.Start.Date" "Measure.End.Date"
hcaphs_df_cleaned <- hcaphs_df[,-which(names(hcaphs_df) %in% zdemographics)]
H_COMP_1_LINEAR_SCORE, H_COMP_2_LINEAR_SCORE, H_COMP_3_LINEAR_SCORE, H_COMP_4_LINEAR_SCORE, H_COMP_5_LINEAR_SCORE, H_COMP_6_LINEAR_SCORE, H_COMP_7_LINEAR_SCORE, H_HSP_RATING_LINEAR_SCORE, H_QUIET_LINEAR_SCORE,H_RECMND_LINEAR_SCORE, H_CLEAN_LINEAR_SCORE,
H_COMP_1, H_COMP_2, H_COMP_3, H_COMP_4, H_COMP_5, H_COMP_6, H_COMP_7, H_HSP_RATING, H_QUIET,H_RECMND, H_CLEAN_HSP
levels(hcaphs_df$HCAHPS.Measure.ID)
## [1] "H_CLEAN_HSP_A_P" "H_CLEAN_HSP_SN_P"
## [3] "H_CLEAN_HSP_U_P" "H_CLEAN_LINEAR_SCORE"
## [5] "H_CLEAN_STAR_RATING" "H_COMP_1_A_P"
## [7] "H_COMP_1_LINEAR_SCORE" "H_COMP_1_SN_P"
## [9] "H_COMP_1_STAR_RATING" "H_COMP_1_U_P"
## [11] "H_COMP_2_A_P" "H_COMP_2_LINEAR_SCORE"
## [13] "H_COMP_2_SN_P" "H_COMP_2_STAR_RATING"
## [15] "H_COMP_2_U_P" "H_COMP_3_A_P"
## [17] "H_COMP_3_LINEAR_SCORE" "H_COMP_3_SN_P"
## [19] "H_COMP_3_STAR_RATING" "H_COMP_3_U_P"
## [21] "H_COMP_4_A_P" "H_COMP_4_LINEAR_SCORE"
## [23] "H_COMP_4_SN_P" "H_COMP_4_STAR_RATING"
## [25] "H_COMP_4_U_P" "H_COMP_5_A_P"
## [27] "H_COMP_5_LINEAR_SCORE" "H_COMP_5_SN_P"
## [29] "H_COMP_5_STAR_RATING" "H_COMP_5_U_P"
## [31] "H_COMP_6_LINEAR_SCORE" "H_COMP_6_N_P"
## [33] "H_COMP_6_STAR_RATING" "H_COMP_6_Y_P"
## [35] "H_COMP_7_A" "H_COMP_7_D_SD"
## [37] "H_COMP_7_LINEAR_SCORE" "H_COMP_7_SA"
## [39] "H_COMP_7_STAR_RATING" "H_HSP_RATING_0_6"
## [41] "H_HSP_RATING_7_8" "H_HSP_RATING_9_10"
## [43] "H_HSP_RATING_LINEAR_SCORE" "H_HSP_RATING_STAR_RATING"
## [45] "H_QUIET_HSP_A_P" "H_QUIET_HSP_SN_P"
## [47] "H_QUIET_HSP_U_P" "H_QUIET_LINEAR_SCORE"
## [49] "H_QUIET_STAR_RATING" "H_RECMND_DN"
## [51] "H_RECMND_DY" "H_RECMND_LINEAR_SCORE"
## [53] "H_RECMND_PY" "H_RECMND_STAR_RATING"
## [55] "H_STAR_RATING"
hcaphs_measures <- c("H_COMP_1_LINEAR_SCORE", "H_COMP_2_LINEAR_SCORE", "H_COMP_3_LINEAR_SCORE", "H_COMP_4_LINEAR_SCORE", "H_COMP_5_LINEAR_SCORE",
"H_COMP_6_LINEAR_SCORE", "H_COMP_7_LINEAR_SCORE", "H_HSP_RATING_LINEAR_SCORE", "H_QUIET_LINEAR_SCORE","H_RECMND_LINEAR_SCORE", "H_CLEAN_LINEAR_SCORE")
hcaphs_df_measures <- hcaphs_df_cleaned %>% filter(hcaphs_df_cleaned$HCAHPS.Measure.ID %in% hcaphs_measures)
hcaphs_df_measures$HCAHPS.Measure.ID <- as.character(hcaphs_df_measures$HCAHPS.Measure.ID)
hcaphs_df_measures$HCAHPS.Measure.ID <- str_replace(string = hcaphs_df_measures$HCAHPS.Measure.ID,pattern = "_LINEAR_SCORE",replacement = "")
hcaphs_df_measures$HCAHPS.Measure.ID <- as.factor(hcaphs_df_measures$HCAHPS.Measure.ID)
zdistri_vars <- c("Provider.ID","HCAHPS.Measure.ID","HCAHPS.Linear.Mean.Value","Number.of.Completed.Surveys",
"Survey.Response.Rate.Percent.Footnote","Number.of.Completed.Surveys.Footnote")
hcaphs_df_measures_distri <- hcaphs_df_measures[,zdistri_vars]
hcaphs_df_measures_distri[is.na(hcaphs_df_measures_distri$HCAHPS.Linear.Mean.Value),] %>% summary()
## Provider.ID HCAHPS.Measure.ID HCAHPS.Linear.Mean.Value
## Min. : 10008 H_CLEAN :1310 Min. : NA
## 1st Qu.:161366 H_COMP_1:1310 1st Qu.: NA
## Median :271317 H_COMP_2:1310 Median : NA
## Mean :284193 H_COMP_3:1310 Mean :NaN
## 3rd Qu.:400130 H_COMP_4:1310 3rd Qu.: NA
## Max. :670112 H_COMP_5:1310 Max. : NA
## (Other) :6550 NA's :14410
## Number.of.Completed.Surveys
## FEWER THAN 50:3971
## 54 : 165
## 80 : 132
## 58 : 121
## 52 : 110
## (Other) :3564
## NA's :6347
## Survey.Response.Rate.Percent.Footnote
## 5 - Results are not available for this reporting period. :4235
## 6 - Fewer than 100 patients completed the HCAHPS survey. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance. :3916
## 10 - Very few patients were eligible for the HCAHPS survey. The scores shown reflect fewer than 50 completed surveys. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance. :3850
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs. :1815
## 1 - The number of cases/patients is too few to report. : 297
## 6 - Fewer than 100 patients completed the HCAHPS survey. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance., 11 - There were discrepancies in the data collection process.: 143
## (Other) : 154
## Number.of.Completed.Surveys.Footnote
## 1 - The number of cases/patients is too few to report. : 297
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.:1815
## 5 - Results are not available for this reporting period. :4235
## NA's :8063
##
##
##
which meansures have NA scores?
Eliminating the rows that have NA scores 1. too few cases to report, 2. Results are unavailable for the current reporting period 3. Data Specific to IQR and OQR
hcaphs_df_measures_distri[is.na(hcaphs_df_measures_distri$Survey.Response.Rate.Percent.Footnote),] %>% summary()
## Provider.ID HCAHPS.Measure.ID HCAHPS.Linear.Mean.Value
## Min. : 10001 H_CLEAN : 3441 Min. : 58.00
## 1st Qu.:130028 H_COMP_1: 3441 1st Qu.: 83.00
## Median :250084 H_COMP_2: 3441 Median : 87.00
## Mean :262769 H_COMP_3: 3441 Mean : 86.41
## 3rd Qu.:390100 H_COMP_4: 3441 3rd Qu.: 90.00
## Max. :670103 H_COMP_5: 3441 Max. :100.00
## (Other) :17205
## Number.of.Completed.Surveys
## 320 : 121
## 322 : 110
## 341 : 110
## 104 : 99
## 130 : 99
## 138 : 99
## (Other):37213
## Survey.Response.Rate.Percent.Footnote
## 1 - The number of cases/patients is too few to report. : 0
## 10 - Very few patients were eligible for the HCAHPS survey. The scores shown reflect fewer than 50 completed surveys. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance. : 0
## 10 - Very few patients were eligible for the HCAHPS survey. The scores shown reflect fewer than 50 completed surveys. Use these scores with caution, as the number of surveys may be too low to reliably assess hospital performance., 11 - There were discrepancies in the data collection process.: 0
## 11 - There were discrepancies in the data collection process. : 0
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs. : 0
## (Other) : 0
## NA's :37851
## Number.of.Completed.Surveys.Footnote
## 1 - The number of cases/patients is too few to report. : 0
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 0
## 5 - Results are not available for this reporting period. : 0
## NA's :37851
##
##
##
hcaphs_df_measures_distri <- hcaphs_df_measures_distri[is.na(hcaphs_df_measures_distri$Survey.Response.Rate.Percent.Footnote),c(1,2,3)]
patient_exp_group <- hcaphs_df_measures_distri %>% spread(key = HCAHPS.Measure.ID, value = HCAHPS.Linear.Mean.Value)
summary(patient_exp_group)
## Provider.ID H_CLEAN H_COMP_1 H_COMP_2
## Min. : 10001 Min. :71.00 Min. : 73.00 Min. : 75.00
## 1st Qu.:130028 1st Qu.:85.00 1st Qu.: 90.00 1st Qu.: 90.00
## Median :250084 Median :87.00 Median : 92.00 Median : 92.00
## Mean :262769 Mean :87.31 Mean : 91.34 Mean : 91.91
## 3rd Qu.:390100 3rd Qu.:90.00 3rd Qu.: 93.00 3rd Qu.: 93.00
## Max. :670103 Max. :99.00 Max. :100.00 Max. :100.00
## H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6
## Min. :64.0 Min. : 70.00 Min. :58.00 Min. :62.00
## 1st Qu.:83.0 1st Qu.: 86.00 1st Qu.:76.00 1st Qu.:85.00
## Median :85.0 Median : 88.00 Median :79.00 Median :87.00
## Mean :85.3 Mean : 87.58 Mean :78.78 Mean :86.93
## 3rd Qu.:88.0 3rd Qu.: 89.00 3rd Qu.:81.00 3rd Qu.:89.00
## Max. :99.0 Max. :100.00 Max. :95.00 Max. :99.00
## H_COMP_7 H_HSP_RATING H_QUIET H_RECMND
## Min. :64.00 Min. :65.00 Min. :60.00 Min. : 59.00
## 1st Qu.:80.00 1st Qu.:87.00 1st Qu.:80.00 1st Qu.: 85.00
## Median :82.00 Median :89.00 Median :83.00 Median : 89.00
## Mean :81.54 Mean :88.74 Mean :83.05 Mean : 88.05
## 3rd Qu.:83.00 3rd Qu.:91.00 3rd Qu.:86.00 3rd Qu.: 91.00
## Max. :99.00 Max. :99.00 Max. :98.00 Max. :100.00
pairs.panels(patient_exp_group[,-1],cex.labels = 0.6 ,cex.cor = 0.7,main = "Relationship between measures")
HCAHPS 3 Item Care Transition Measure (CTM-3) - - H_COMP_7
Merging the patient experience group dataframe with the master dataframe
master_df <- merge(x = safety_of_care_group,y = patient_exp_group,by = intersect(names(safety_of_care_group),names(patient_exp_group)),all = TRUE)
head(master_df)
time_and_eff_care_df <- read.csv(file = "ValidFiles/Timely and Effective Care - Hospital.csv",header = T,check.names = T,stringsAsFactors = T,na.strings = c("Not Available",""))
str(time_and_eff_care_df)
## 'data.frame': 207174 obs. of 16 variables:
## $ Provider.ID : int 10001 10001 10001 10001 10001 10001 10001 10001 10001 10001 ...
## $ Hospital.Name : Factor w/ 4617 levels "ABBEVILLE AREA MEDICAL CENTER",..: 3685 3685 3685 3685 3685 3685 3685 3685 3685 3685 ...
## $ Address : Factor w/ 4789 levels " CALLE CONCEPCION VERA AYALA #550 S",..: 468 468 468 468 468 468 468 468 468 468 ...
## $ City : Factor w/ 2949 levels "ABBEVILLE","ABERDEEN",..: 699 699 699 699 699 699 699 699 699 699 ...
## $ State : Factor w/ 56 levels "AK","AL","AR",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ ZIP.Code : int 36301 36301 36301 36301 36301 36301 36301 36301 36301 36301 ...
## $ County.Name : Factor w/ 1565 levels "ABBEVILLE","ACADIA",..: 668 668 668 668 668 668 668 668 668 668 ...
## $ Phone.Number : num 3.35e+09 3.35e+09 3.35e+09 3.35e+09 3.35e+09 ...
## $ Condition : Factor w/ 11 levels "Blood Clot Prevention and Treatment",..: 5 5 2 4 4 4 6 9 9 5 ...
## $ Measure.ID : Factor w/ 43 levels "AMI_7a","AMI_8a",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Measure.Name : Factor w/ 43 levels "Anticoagulation overlap therapy",..: 16 32 19 9 10 11 14 22 18 26 ...
## $ Score : Factor w/ 582 levels "0","1","10","100",..: NA 568 NA 193 519 579 4 575 549 NA ...
## $ Sample : int NA 24 NA 535 535 NA 337 537 3791 NA ...
## $ Footnote : Factor w/ 13 levels "1 - The number of cases/patients is too few to report.",..: 13 10 12 6 6 NA 10 6 NA 11 ...
## $ Measure.Start.Date: Factor w/ 5 levels "01/01/2014","01/01/2015",..: 2 2 2 2 2 1 2 4 5 2 ...
## $ Measure.End.Date : Factor w/ 5 levels "03/31/2015","03/31/2016",..: 5 3 5 5 5 4 3 1 2 5 ...
zdemogrphic_vars <- which(names(time_and_eff_care_df) %in% zdemographics)
time_and_eff_care_cleaned <- time_and_eff_care_df[,-zdemogrphic_vars]
head(time_and_eff_care_cleaned)
summary(time_and_eff_care_cleaned)
## Provider.ID Condition
## Min. : 10001 Emergency Department :38544
## 1st Qu.:140185 Stroke Care :38544
## Median :260037 Heart Attack or Chest Pain :33726
## Mean :267984 Blood Clot Prevention and Treatment:28908
## 3rd Qu.:390211 Surgical Care Improvement Project :28908
## Max. :670112 Colonoscopy care : 9636
## (Other) :28908
## Measure.ID
## AMI_7a : 4818
## AMI_8a : 4818
## CAC_3 : 4818
## ED_1b : 4818
## ED_2b : 4818
## EDV : 4818
## (Other):178266
## Measure.Name
## Anticoagulation overlap therapy : 4818
## Anticoagulation Therapy for Atrial Fibrillation/Flutter: 4818
## Antithrombotic Therapy by End of Hospital Day 2 : 4818
## Aspirin at Arrival : 4818
## Assessed for Rehabilitation : 4818
## Discharged on Antithrombotic Therapy : 4818
## (Other) :178266
## Score Sample
## 100 : 14414 Min. : 0
## 99 : 4502 1st Qu.: 45
## 98 : 3912 Median : 118
## 97 : 2918 Mean : 2010
## 0 : 2792 3rd Qu.: 405
## (Other): 49612 Max. :506621
## NA's :129024 NA's :132305
## Footnote
## 5 - Results are not available for this reporting period. :86714
## 2 - Data submitted were based on a sample of cases/patients. :28612
## 1 - The number of cases/patients is too few to report. : 9417
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 7095
## 7 - No cases met the criteria for this measure. : 6908
## (Other) :21439
## NA's :46989
time_and_eff_care_cleaned %>% group_by(Footnote) %>% tally()
time_and_eff_care_cleaned$Score <- as.integer(time_and_eff_care_cleaned$Score)
time_and_eff_care_cleaned[is.na(time_and_eff_care_cleaned$Footnote),] %>% head()
time_and_eff_care_imputed <- time_and_eff_care_cleaned[
is.na(time_and_eff_care_cleaned$Footnote) | (time_and_eff_care_cleaned$Footnote %in% c("2 - Data submitted were based on a sample of cases/patients.")),]
str(time_and_eff_care_imputed)
## 'data.frame': 75601 obs. of 7 variables:
## $ Provider.ID : int 10001 10001 10001 10001 10001 10001 10001 10001 10001 10001 ...
## $ Condition : Factor w/ 11 levels "Blood Clot Prevention and Treatment",..: 4 4 4 9 9 4 4 4 4 3 ...
## $ Measure.ID : Factor w/ 43 levels "AMI_7a","AMI_8a",..: 4 5 6 8 9 11 13 14 15 18 ...
## $ Measure.Name: Factor w/ 43 levels "Anticoagulation overlap therapy",..: 9 10 11 22 18 29 8 27 24 13 ...
## $ Score : int 193 519 579 575 549 112 509 578 337 550 ...
## $ Sample : int 535 535 NA 537 3791 371 377 111 58662 72 ...
## $ Footnote : Factor w/ 13 levels "1 - The number of cases/patients is too few to report.",..: 6 6 NA 6 NA NA NA NA NA NA ...
time_and_eff_care_imputed %>% summary()
## Provider.ID Condition
## Min. : 10001 Emergency Department :24437
## 1st Qu.:120001 Stroke Care :13844
## Median :250123 Blood Clot Prevention and Treatment:12954
## Mean :262222 Preventive Care : 7744
## 3rd Qu.:390104 Colonoscopy care : 5358
## Max. :670112 Heart Attack or Chest Pain : 4785
## (Other) : 6479
## Measure.ID
## IMM_3_OP_27_FAC_ADHPCT: 4107
## IMM_2 : 3637
## VTE_1 : 3514
## ED_1b : 3452
## ED_2b : 3445
## OP_20 : 3303
## (Other) :54143
## Measure.Name Score
## Healthcare workers given influenza vaccination: 4107 Min. : 1
## Immunization for influenza : 3637 1st Qu.: 16
## Venous thromboembolism prophylaxis : 3514 Median :482
## ED1 : 3452 Mean :336
## ED2 : 3445 3rd Qu.:570
## Door to diagnostic eval : 3303 Max. :582
## (Other) :54143
## Sample
## Min. : 11
## 1st Qu.: 47
## Median : 123
## Mean : 2078
## 3rd Qu.: 409
## Max. :506621
## NA's :3283
## Footnote
## 2 - Data submitted were based on a sample of cases/patients. :28612
## 1 - The number of cases/patients is too few to report. : 0
## 1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients. : 0
## 1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients., 3 - Results are based on a shorter time period than required.: 0
## 1 - The number of cases/patients is too few to report., 3 - Results are based on a shorter time period than required. : 0
## (Other) : 0
## NA's :46989
time_and_eff_care_imputed <- time_and_eff_care_imputed[!is.na(time_and_eff_care_imputed$Sample),]
summary(time_and_eff_care_imputed)
## Provider.ID Condition
## Min. : 10001 Emergency Department :21154
## 1st Qu.:120001 Stroke Care :13844
## Median :250117 Blood Clot Prevention and Treatment:12954
## Mean :262085 Preventive Care : 7744
## 3rd Qu.:390102 Colonoscopy care : 5358
## Max. :670112 Heart Attack or Chest Pain : 4785
## (Other) : 6479
## Measure.ID
## IMM_3_OP_27_FAC_ADHPCT: 4107
## IMM_2 : 3637
## VTE_1 : 3514
## ED_1b : 3452
## ED_2b : 3445
## OP_20 : 3303
## (Other) :50860
## Measure.Name Score
## Healthcare workers given influenza vaccination: 4107 Min. : 1.0
## Immunization for influenza : 3637 1st Qu.: 4.0
## Venous thromboembolism prophylaxis : 3514 Median :445.0
## ED1 : 3452 Mean :324.9
## ED2 : 3445 3rd Qu.:569.0
## Door to diagnostic eval : 3303 Max. :578.0
## (Other) :50860
## Sample
## Min. : 11
## 1st Qu.: 47
## Median : 123
## Mean : 2078
## 3rd Qu.: 409
## Max. :506621
##
## Footnote
## 2 - Data submitted were based on a sample of cases/patients. :28612
## 1 - The number of cases/patients is too few to report. : 0
## 1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients. : 0
## 1 - The number of cases/patients is too few to report., 2 - Data submitted were based on a sample of cases/patients., 3 - Results are based on a shorter time period than required.: 0
## 1 - The number of cases/patients is too few to report., 3 - Results are based on a shorter time period than required. : 0
## (Other) : 0
## NA's :43706
# Samples Vs Score
ggplot(time_and_eff_care_imputed, aes(Measure.ID,Score)) + geom_col() +
labs(title = "Measure.ID Vs Scores") +
theme(axis.text.x = element_text(angle = 60,vjust = 1,hjust = 1),
plot.title = element_text(hjust = 0.5))
ggplot(time_and_eff_care_imputed, aes(Measure.ID)) + geom_bar() +
labs(title = "Measure.ID Vs Count of Providers") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5,hjust = 1),
plot.title = element_text(hjust = 0.5))
Timeliness Group: ED_1b,ED_2b,OP_18b, OP_3, OP_5, OP_20,OP_21 ED-1b Median Time from ED Arrival to ED Departure for Admitted ED Patients ED-2b Admit Decision Time to ED Departure Time for Admitted Patients OP-3 Median Time to Transfer to Another Facility for Acute Coronary Intervention OP-5 Median Time to ECG OP-18b Median Time from ED Arrival to ED Departure for Discharged ED Patients OP-20 Door to Diagnostic Evaluation by a Qualified Medical Professional OP-21 ED-Median Time to Pain Management for Long Bone Fracture
Effectiveness Group: CAC_3,IMM_2,IMM_3_OP_27_FAC_ADHPCT,OP_22,OP_23,OP_29,OP_30,OP_4,PC_01,STK_1, STK_4, STK_6,STK_8,VTE_1,VTE_2,VTE_3,VTE_5, VTE_6 CAC-3 Home Management Plan of Care (HMPC) Document Given to Patient/Caregiver IMM-2 Influenza Immunization IMM-3/OP-27 Healthcare Personnel Influenza Vaccination OP-4 Aspirin at Arrival OP-22 ED-Patient Left Without Being Seen OP-23 ED-Head CT or MRI Scan Results for Acute Ischemic Stroke or Hemorrhagic Stroke who Received Head CT or MRI Scan Interpretation Within 45 Minutes of Arrival OP-29 Endoscopy/Polyp Surveillance: Appropriate Follow-Up Interval for Normal Colonoscopy in Average Risk Patients OP-30 Endoscopy/Polyp Surveillance: Colonoscopy Interval for Patients with a History of Adenomatous Polyps – Avoidance of Inappropriate Use PC-01 Elective Delivery Prior to 39 Completed Weeks Gestation: Percentage of Babies Electively Delivered Prior to 39 Completed Weeks Gestation STK-1 Venous Thromboembolism (VTE) Prophylaxis STK-4 Thrombolytic Therapy STK-6 Discharged on Statin Medication STK-8 Stroke Education VTE-3 Venous Thromboembolism Patients with Anticoagulation Overlap Therapy VTE-1 Venous Thromboembolism Prophylaxis VTE-2 Intensive Care Unit Venous Thromboembolism Prophylaxis VTE-5 Venous Thromboembolism Warfarin Therapy Discharge Instructions VTE-6 Hospital Acquired Potentially-Preventable Venous Thromboembolism
Separating the the measure variables needed for analysis
zvar1 <- which(names(time_and_eff_care_imputed) %in% c("Provider.ID","Measure.ID","Score"))
timeliness_group <- time_and_eff_care_imputed[,zvar1] %>% spread(key = Measure.ID,value = Score)
zvar1 <- which(names(timeliness_group) %in% c("Provider.ID","ED_1b","ED_2b","OP_18b", "OP_3b", "OP_5", "OP_20","OP_21"))
zvar2 <- which(names(timeliness_group) %in% c("Provider.ID","CAC_3","IMM_2","IMM_3_OP_27_FAC_ADHPCT","OP_22","OP_23","OP_29","OP_30","OP_4","PC_01","STK_1", "STK_4", "STK_6","STK_8","VTE_1","VTE_2","VTE_3","VTE_5", "VTE_6"))
effectiveness_group <- timeliness_group[,zvar2]
timeliness_group <- timeliness_group[,zvar1]
pairs.panels(x = timeliness_group[2:length(zvar1)],cex.cor = 0.5,cex.labels = 0.9,ellipses = TRUE,pch = 21,bg = rainbow(length(zvar1)), main = "Timeliness Group variable correlation" )
- most of the measure variables of timeliness group are uncorrelated. - ED_1B and ED_2B are the inversely correlated to some extent as the timespent in the emergency room is the common between them. They are inversely correlated as the doctors visit reduces the time spend by the patient before moving to ED to inpatient room is reduced.
zvar1 <- which(names(effectiveness_group) %in% "IMM_3_OP_27_FAC_ADHPCT")
names(effectiveness_group)[zvar1] <- "IMM_3_OP_27"
cor.plot(effectiveness_group[,-1],
stars = FALSE,numbers = TRUE,colors = TRUE,cex = 0.5, show.legend = FALSE,
xlas = 2,cex.axis = 0.6,main = "Effectiveness group measure correlation")
- Variables represented by the effectiveness group have small degree of correlation
# merging master dataframe with timliness group
master_df <- merge(x = master_df,y = timeliness_group,by = intersect(names(master_df),names(timeliness_group)),all = TRUE)
# merging master dataframe with effectiveness group
master_df <- merge(x = master_df,y = effectiveness_group,by = intersect(names(master_df),names(effectiveness_group)),all = TRUE)
summary(master_df)
## Provider.ID Hospital.overall.rating COMP_HIP_KNEE PSI_4_SURG_COMP
## Min. : 10001 1 : 115 Min. :1.500 Min. : 70.79
## 1st Qu.:140134 2 : 661 1st Qu.:2.700 1st Qu.:124.44
## Median :251304 3 :1668 Median :3.000 Median :135.57
## Mean :265886 4 : 921 Mean :3.042 Mean :136.76
## 3rd Qu.:390143 5 : 111 3rd Qu.:3.400 3rd Qu.:148.13
## Max. :670112 NA's: 968 Max. :6.000 Max. :212.16
## NA's :1730 NA's :2626
## PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR
## Min. :0.440 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.790 1st Qu.:0.2070 1st Qu.:0.1270 1st Qu.:0.3998
## Median :0.875 Median :0.4640 Median :0.4205 Median :0.8325
## Mean :0.890 Mean :0.5408 Mean :0.4787 Mean :0.9642
## 3rd Qu.:0.960 3rd Qu.:0.7465 3rd Qu.:0.7008 3rd Qu.:1.4085
## Max. :2.140 Max. :5.1900 Max. :7.0140 Max. :5.2160
## NA's :1220 NA's :2077 NA's :1566 NA's :2404
## HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :71.00
## 1st Qu.:0.000 1st Qu.:0.4280 1st Qu.:0.4920 1st Qu.:85.00
## Median :0.698 Median :0.8240 Median :0.7985 Median :87.00
## Mean :0.871 Mean :0.9892 Mean :0.8170 Mean :87.31
## 3rd Qu.:1.339 3rd Qu.:1.3800 3rd Qu.:1.1140 3rd Qu.:90.00
## Max. :4.819 Max. :7.0170 Max. :3.7410 Max. :99.00
## NA's :3588 NA's :2619 NA's :1198 NA's :1003
## H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4
## Min. : 73.00 Min. : 75.00 Min. :64.0 Min. : 70.00
## 1st Qu.: 90.00 1st Qu.: 90.00 1st Qu.:83.0 1st Qu.: 86.00
## Median : 92.00 Median : 92.00 Median :85.0 Median : 88.00
## Mean : 91.34 Mean : 91.91 Mean :85.3 Mean : 87.58
## 3rd Qu.: 93.00 3rd Qu.: 93.00 3rd Qu.:88.0 3rd Qu.: 89.00
## Max. :100.00 Max. :100.00 Max. :99.0 Max. :100.00
## NA's :1003 NA's :1003 NA's :1003 NA's :1003
## H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING
## Min. :58.00 Min. :62.00 Min. :64.00 Min. :65.00
## 1st Qu.:76.00 1st Qu.:85.00 1st Qu.:80.00 1st Qu.:87.00
## Median :79.00 Median :87.00 Median :82.00 Median :89.00
## Mean :78.78 Mean :86.93 Mean :81.54 Mean :88.74
## 3rd Qu.:81.00 3rd Qu.:89.00 3rd Qu.:83.00 3rd Qu.:91.00
## Max. :95.00 Max. :99.00 Max. :99.00 Max. :99.00
## NA's :1003 NA's :1003 NA's :1003 NA's :1003
## H_QUIET H_RECMND ED_1b ED_2b
## Min. :60.00 Min. : 59.00 Min. : 5.0 Min. : 1
## 1st Qu.:80.00 1st Qu.: 85.00 1st Qu.:137.0 1st Qu.: 62
## Median :83.00 Median : 89.00 Median :186.0 Median :380
## Mean :83.05 Mean : 88.05 Mean :206.2 Mean :305
## 3rd Qu.:86.00 3rd Qu.: 91.00 3rd Qu.:255.0 3rd Qu.:526
## Max. :98.00 Max. :100.00 Max. :578.0 Max. :578
## NA's :1003 NA's :1003 NA's :992 NA's :999
## OP_18b OP_20 OP_21 OP_3b
## Min. : 4 Min. : 1.0 Min. : 4.0 Min. : 9.0
## 1st Qu.: 37 1st Qu.: 94.0 1st Qu.:338.0 1st Qu.:316.0
## Median : 64 Median :183.0 Median :441.0 Median :432.0
## Mean :116 Mean :225.1 Mean :409.8 Mean :393.4
## 3rd Qu.:101 3rd Qu.:337.0 3rd Qu.:499.0 3rd Qu.:492.0
## Max. :578 Max. :577.0 Max. :578.0 Max. :578.0
## NA's :1143 NA's :1141 NA's :1263 NA's :4056
## OP_5 CAC_3 IMM_2 IMM_3_OP_27
## Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 4
## 1st Qu.: 72.0 1st Qu.:482.5 1st Qu.:555.0 1st Qu.:534
## Median :431.0 Median :560.0 Median :572.0 Median :559
## Mean :327.1 Mean :447.2 Mean :483.1 Mean :527
## 3rd Qu.:525.0 3rd Qu.:572.0 3rd Qu.:577.0 3rd Qu.:572
## Max. :562.0 Max. :578.0 Max. :578.0 Max. :578
## NA's :2384 NA's :4269 NA's :807 NA's :337
## OP_22 OP_23 OP_29 OP_30
## Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 1.0
## 1st Qu.: 2.0 1st Qu.:473.0 1st Qu.:337.0 1st Qu.:261.0
## Median : 2.0 Median :531.0 Median :534.0 Median :539.0
## Mean :100.4 Mean :472.2 Mean :419.6 Mean :406.9
## 3rd Qu.:116.0 3rd Qu.:553.0 3rd Qu.:563.0 3rd Qu.:568.0
## Max. :562.0 Max. :577.0 Max. :578.0 Max. :578.0
## NA's :1169 NA's :3247 NA's :1713 NA's :1817
## OP_4 PC_01 STK_1 STK_4
## Min. : 4 Min. : 1.0 Min. : 1.0 Min. : 1.0
## 1st Qu.: 4 1st Qu.: 1.0 1st Qu.: 4.0 1st Qu.: 4.0
## Median :568 Median : 2.0 Median :568.0 Median :534.0
## Mean :375 Mean :111.6 Mean :355.3 Mean :332.8
## 3rd Qu.:575 3rd Qu.:227.0 3rd Qu.:577.0 3rd Qu.:568.0
## Max. :578 Max. :562.0 Max. :578.0 Max. :578.0
## NA's :2404 NA's :1966 NA's :1795 NA's :3547
## STK_6 STK_8 VTE_1 VTE_2
## Min. : 4.0 Min. : 1.0 Min. : 1.0 Min. : 1.0
## 1st Qu.: 4.0 1st Qu.: 4.0 1st Qu.:554.0 1st Qu.: 4.0
## Median :569.0 Median :563.0 Median :570.0 Median :569.0
## Mean :394.7 Mean :390.3 Mean :471.2 Mean :365.9
## 3rd Qu.:577.0 3rd Qu.:573.0 3rd Qu.:577.0 3rd Qu.:577.0
## Max. :578.0 Max. :578.0 Max. :578.0 Max. :578.0
## NA's :1884 NA's :2090 NA's :930 NA's :1532
## VTE_3 VTE_5 VTE_6
## Min. : 4 Min. : 4 Min. : 1.00
## 1st Qu.: 4 1st Qu.: 4 1st Qu.: 1.00
## Median :563 Median :534 Median : 1.00
## Mean :412 Mean :319 Mean : 95.91
## 3rd Qu.:572 3rd Qu.:569 3rd Qu.:116.00
## Max. :578 Max. :578 Max. :570.00
## NA's :1963 NA's :2216 NA's :3186
readm_mort_df <- read.csv(file = "ValidFiles/Readmissions and Deaths - Hospital.csv",check.names = T,header = T,stringsAsFactors = T,na.strings = c("Not Available",""))
head(readm_mort_df)
zdemogrphic_vars <- which(names(readm_mort_df) %in% c(zdemographics,"Measure.Name","Compared.to.National"))
readm_mort_cleaned <- readm_mort_df[,-zdemogrphic_vars]
head(readm_mort_cleaned)
summary(readm_mort_cleaned)
## Provider.ID Measure.ID Denominator Score
## Min. : 10001 MORT_30_AMI : 4818 Min. : 25.0 Min. : 1.40
## 1st Qu.:140185 MORT_30_CABG: 4818 1st Qu.: 77.0 1st Qu.:12.10
## Median :260037 MORT_30_COPD: 4818 Median : 178.0 Median :15.20
## Mean :267984 MORT_30_HF : 4818 Mean : 389.6 Mean :14.51
## 3rd Qu.:390211 MORT_30_PN : 4818 3rd Qu.: 388.0 3rd Qu.:17.60
## Max. :670112 MORT_30_STK : 4818 Max. :26526.0 Max. :31.30
## (Other) :38544 NA's :25742 NA's :25742
## Lower.Estimate Higher.Estimate
## Min. : 0.80 Min. : 2.20
## 1st Qu.: 9.30 1st Qu.:15.40
## Median :12.50 Median :18.00
## Mean :11.85 Mean :17.73
## 3rd Qu.:14.90 3rd Qu.:21.30
## Max. :27.60 Max. :35.30
## NA's :25742 NA's :25742
## Footnote
## 1 - The number of cases/patients is too few to report. :11192
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 2282
## 4 - Data suppressed by CMS for one or more quarters. : 686
## 5 - Results are not available for this reporting period. :11280
## 7 - No cases met the criteria for this measure. : 302
## NA's :41710
##
1 - The number of cases/patients is too few to report. 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs. 4 - Data suppressed by CMS for one or more quarters.
5 - Results are not available for this reporting period. 7 - No cases met the criteria for this measure.
readm_mort_cleaned[!is.na(readm_mort_cleaned$Footnote),] %>% group_by(Footnote) %>% summarise(cnt_rows = n(),avg_score = mean(Score))
readm_mort_cleaned <- readm_mort_cleaned[is.na(readm_mort_cleaned$Footnote),]
ggplot(readm_mort_cleaned,aes(Measure.ID,Score)) + geom_col() +
labs(title = "Mortality Readmission Rate Score") + ylab(label = "Provider Count") +
theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1),
plot.title = element_text(hjust = 0.5))
ggplot(readm_mort_cleaned,aes(Measure.ID)) + geom_bar() + labs(title = "Mortality Readmission Rate Count by Providers") + ylab(label = "Provider Count") +
theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1),
plot.title = element_text(hjust = 0.5))
READM-30-AMI Acute Myocardial Infarction (AMI) 30-Day Readmission Rate READM-30-COPD Chronic Obstructive Pulmonary Disease (COPD) 30-Day Readmission Rate READM-30-CABG Coronary Artery Bypass Graft (CABG) 30-Day Readmission Rate READM-30-HF Heart Failure (HF) 30-Day Readmission Rate READM-30-Hip-Knee Hospital-Level 30-Day All-Cause Risk- Standardized Readmission Rate (RSRR) Following Elective Total Hip Arthroplasty (THA)/ Total Knee Arthroplasty (TKA) READM-30-PN Pneumonia (PN) 30-Day Readmission Rate READM-30-STK Stroke (STK) 30-Day Readmission Rate READM-30-HOSP-WIDE HWR Hospital-Wide All-Cause Unplanned Readmission
levels(readm_mort_cleaned$Measure.ID)
## [1] "MORT_30_AMI" "MORT_30_CABG" "MORT_30_COPD"
## [4] "MORT_30_HF" "MORT_30_PN" "MORT_30_STK"
## [7] "READM_30_AMI" "READM_30_CABG" "READM_30_COPD"
## [10] "READM_30_HF" "READM_30_HIP_KNEE" "READM_30_HOSP_WIDE"
## [13] "READM_30_PN" "READM_30_STK"
zvar1 <- which(names(readm_mort_cleaned) %in% c("Provider.ID","Measure.ID","Score"))
readm_mort_final <- readm_mort_cleaned[,zvar1] %>% spread(key = Measure.ID,value = Score)
zvar1 <- which(names(readm_mort_final) %in% c("Provider.ID","MORT_30_AMI","MORT_30_CABG","MORT_30_COPD","MORT_30_HF","MORT_30_PN","MORT_30_STK"))
mortality_grp <- readm_mort_final[,zvar1]
zvar1 <- which(names(readm_mort_final) %in% c("MORT_30_AMI","MORT_30_CABG","MORT_30_COPD","MORT_30_HF","MORT_30_PN","MORT_30_STK"))
readmission_grp <- readm_mort_final[,-zvar1]
dim(readmission_grp)
## [1] 4415 9
dim(mortality_grp)
## [1] 4415 7
cor.plot(mortality_grp[,-1],cex.axis = 0.6, numbers = TRUE,cex = 0.7,stars = FALSE,show.legend = F,xlas = 2, main = "Mortality Group Correlation Plot")
cor.plot(readmission_grp[,-1],cex.axis = 0.6, numbers = TRUE,cex = 0.7,stars = FALSE,show.legend = F,xlas = 2, main = "Readmission Group Correlation Plot")
pairs.panels(readmission_grp[,-1],scale = TRUE,ellipses = TRUE,pch = 21,bg = rainbow(n = ncol(readmission_grp)),cex.labels = 0.7)
Ailments are significantly correlated.
master_df <- merge(x = master_df,y = readm_mort_final,by = intersect(x = names(master_df),y = names(readm_mort_final)),all = TRUE)
op_imaging_eff_df <- read.csv(file = "ValidFiles/Outpatient Imaging Efficiency - Hospital.csv",header = T,check.names = T,na.strings = c("Not Available",""),stringsAsFactors = T)
head(op_imaging_eff_df)
zdemogrphic_vars <- which(names(op_imaging_eff_df) %in% zdemographics)
op_imaging_eff_cleaned <- op_imaging_eff_df[,-zdemogrphic_vars]
head(op_imaging_eff_cleaned)
summary(op_imaging_eff_cleaned)
## Provider.ID Measure.ID
## Min. : 10001 OP_10:4818
## 1st Qu.:140185 OP_11:4818
## Median :260037 OP_13:4818
## Mean :267984 OP_14:4818
## 3rd Qu.:390211 OP_8 :4818
## Max. :670112 OP_9 :4818
##
## Measure.Name
## Abdomen CT Use of Contrast Material :4818
## Mammography Follow-up Rates :4818
## MRI Lumbar Spine for Low Back Pain :4818
## Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery:4818
## Outpatients with brain CT scans who got a sinus CT scan at the same time :4818
## Thorax CT Use of Contrast Material :4818
##
## Score
## Min. : 0.000
## 1st Qu.: 2.200
## Median : 5.000
## Mean : 9.223
## 3rd Qu.: 9.400
## Max. :82.600
## NA's :12595
## Footnote
## 1 - The number of cases/patients is too few to report. : 5659
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 978
## 4 - Data suppressed by CMS for one or more quarters. : 295
## 5 - Results are not available for this reporting period. : 3540
## 7 - No cases met the criteria for this measure. : 2123
## NA's :16313
##
op_imaging_eff_cleaned[is.na(op_imaging_eff_cleaned$Footnote),] %>% summary()
## Provider.ID Measure.ID
## Min. : 10001 OP_10:3629
## 1st Qu.:140160 OP_11:3349
## Median :260015 OP_13:2233
## Mean :265982 OP_14:2304
## 3rd Qu.:390070 OP_8 :1524
## Max. :670098 OP_9 :3274
## Measure.Name
## Abdomen CT Use of Contrast Material :3629
## Mammography Follow-up Rates :3274
## MRI Lumbar Spine for Low Back Pain :1524
## Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery:2233
## Outpatients with brain CT scans who got a sinus CT scan at the same time :2304
## Thorax CT Use of Contrast Material :3349
## Score
## Min. : 0.000
## 1st Qu.: 2.200
## Median : 5.000
## Mean : 9.223
## 3rd Qu.: 9.400
## Max. :82.600
## Footnote
## 1 - The number of cases/patients is too few to report. : 0
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 0
## 4 - Data suppressed by CMS for one or more quarters. : 0
## 5 - Results are not available for this reporting period. : 0
## 7 - No cases met the criteria for this measure. : 0
## NA's :16313
op_imaging_eff_cleaned[!is.na(op_imaging_eff_cleaned$Score),] %>% summary()
## Provider.ID Measure.ID
## Min. : 10001 OP_10:3629
## 1st Qu.:140160 OP_11:3349
## Median :260015 OP_13:2233
## Mean :265982 OP_14:2304
## 3rd Qu.:390070 OP_8 :1524
## Max. :670098 OP_9 :3274
## Measure.Name
## Abdomen CT Use of Contrast Material :3629
## Mammography Follow-up Rates :3274
## MRI Lumbar Spine for Low Back Pain :1524
## Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery:2233
## Outpatients with brain CT scans who got a sinus CT scan at the same time :2304
## Thorax CT Use of Contrast Material :3349
## Score
## Min. : 0.000
## 1st Qu.: 2.200
## Median : 5.000
## Mean : 9.223
## 3rd Qu.: 9.400
## Max. :82.600
## Footnote
## 1 - The number of cases/patients is too few to report. : 0
## 19 - Data are shown only for hospitals that participate in the Inpatient Quality Reporting (IQR) and Outpatient Quality Reporting (OQR) programs.: 0
## 4 - Data suppressed by CMS for one or more quarters. : 0
## 5 - Results are not available for this reporting period. : 0
## 7 - No cases met the criteria for this measure. : 0
## NA's :16313
op_imaging_eff_cleaned <- op_imaging_eff_cleaned[is.na(op_imaging_eff_cleaned$Footnote),]
ggplot(op_imaging_eff_cleaned,aes(Measure.ID)) + geom_bar() + labs(title = "Count of Providers Vs Imaging Efficiency measures") +
theme(plot.title = element_text(hjust = 0.5))
ggplot(op_imaging_eff_cleaned,aes(Measure.ID, mean(Score))) + geom_col() + labs(title = "Count of Providers Vs Imaging Efficiency measures") +
theme(plot.title = element_text(hjust = 0.5))
Oupatient Imaging Efficiency Group: OP-8 MRI Lumbar Spine for Low Back Pain OP-10 Abdomen CT Use of Contrast Material OP-11 Thorax CT Use of Contrast Material OP-13 Cardiac Imaging for Preoperative Risk Assessment for Non-Cardiac Low-Risk Surgery OP-14 Simultaneous Use of Brain Computed Tomography (CT) and Sinus CT
zvar1 <- c("Provider.ID","Measure.ID","Score")
zvar2 <- which(names(op_imaging_eff_cleaned) %in% zvar1)
op_imaging_eff_final <- op_imaging_eff_cleaned[,zvar2] %>% spread(key = Measure.ID,value = Score)
# dropping the less singificant OP_9 measure
op_imaging_eff_final <- op_imaging_eff_final[,-ncol(op_imaging_eff_final)]
head(op_imaging_eff_final)
pairs.panels(op_imaging_eff_final[,-1],scale = T,smooth = T,density = T,ellipses = T,pch = 21,method = 'pearson',cex.labels = 1,cex.cor = 1)
cor.plot(op_imaging_eff_final[,-1],show.legend = F,xlas = 2,keep.par = T,numbers = T,colors = T)
* Data variablility is uniform acorss the dataset except the Op_10 and OP-11 measure which are closely correlated with skewed distribution
master_df <- merge(x = master_df,y = op_imaging_eff_final,by = intersect(x = names(master_df),y = names(op_imaging_eff_final)),all = TRUE) #%>% head()
Next steps: 1. check outliers in the measures 2. imputing missing values
ratings <- as.character(master_df$Hospital.overall.rating)
ratings[which(is.na(ratings))] <- "Missing"
master_df$Hospital.overall.rating <- factor(ratings)
na_indices <- apply(master_df[,-c(1,2)], MARGIN = 1, function(x) all(is.na(x)))
sum(na_indices)
## [1] 0
paste0(round(prop.table(table(master_df$Hospital.overall.rating))*100,2),"%")
## [1] "2.5%" "14.38%" "36.28%" "20.03%" "2.41%" "24.39%"
library(mice)
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following object is masked from 'package:tidyr':
##
## complete
## The following objects are masked from 'package:base':
##
## cbind, rbind
master_df_imputed <- mice(data = master_df[,-c(1,2)],seed = 100,maxit = 5,m = 5)
##
## iter imp variable
## 1 1 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 1 2 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 1 3 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 1 4 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 1 5 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 2 1 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 2 2 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 2 3 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 2 4 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 2 5 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 3 1 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 3 2 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 3 3 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 3 4 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 3 5 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 4 1 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 4 2 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 4 3 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 4 4 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 4 5 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 5 1 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 5 2 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 5 3 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 5 4 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
## 5 5 COMP_HIP_KNEE PSI_4_SURG_COMP PSI_90_SAFETY HAI_1_SIR HAI_2_SIR HAI_3_SIR HAI_4_SIR HAI_5_SIR HAI_6_SIR H_CLEAN H_COMP_1 H_COMP_2 H_COMP_3 H_COMP_4 H_COMP_5 H_COMP_6 H_COMP_7 H_HSP_RATING H_QUIET H_RECMND ED_1b ED_2b OP_18b OP_20 OP_21 OP_3b OP_5 CAC_3 IMM_2 IMM_3_OP_27 OP_22 OP_23 OP_29 OP_30 OP_4 PC_01 STK_1 STK_4 STK_6 STK_8 VTE_1 VTE_2 VTE_3 VTE_5 VTE_6 MORT_30_AMI MORT_30_CABG MORT_30_COPD MORT_30_HF MORT_30_PN MORT_30_STK READM_30_AMI READM_30_CABG READM_30_COPD READM_30_HF READM_30_HIP_KNEE READM_30_HOSP_WIDE READM_30_PN READM_30_STK OP_10 OP_11 OP_13 OP_14 OP_8
master_df_imputed_final <- complete(data = master_df_imputed,action = 5)
head(master_df_imputed_final)
set.seed(100)
master_df_final <- cbind(master_df[,c(1,2)],master_df_imputed_final)
#master_df_final <- master_df
# Sampling
indices <- sample(1:nrow(master_df_final),0.7 * nrow(master_df_final))
# training dataset
train_df <- master_df_final[indices,]
# test dataset
test_df <- master_df_final[-indices,]
set.seed(100)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
##
## outlier
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(doParallel)
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
## Loading required package: parallel
doParallel::registerDoParallel(cl = 4,cores = 4)
train_control_rf <- trainControl(method = "repeatedcv",
repeats = 5,
number = 5,
search = "grid",
sampling = "smote",
allowParallel = TRUE)
# Tuning grid parameters of Random Forest
tuning_grid_rf <- expand.grid(.mtry = round(sqrt(ncol(train_df[,-1]))),ntree = seq(100,1000,100))
# training the random forest with training dataset
model_rf <- randomForest(Hospital.overall.rating ~.,
data = train_df[,-1],
trControl = train_control_rf,
tuneGrid = tuning_grid_rf,
metric = "auc",
na.action = na.roughfix,
seed = 100)
model_rf
##
## Call:
## randomForest(formula = Hospital.overall.rating ~ ., data = train_df[, -1], trControl = train_control_rf, tuneGrid = tuning_grid_rf, metric = "auc", seed = 100, na.action = na.roughfix)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 8
##
## OOB estimate of error rate: 42.71%
## Confusion matrix:
## 1 2 3 4 5 Missing class.error
## 1 18 58 0 0 0 0 0.7631579
## 2 1 266 188 2 0 3 0.4217391
## 3 0 37 1022 70 0 53 0.1353638
## 4 0 0 209 435 1 20 0.3458647
## 5 0 0 1 45 19 4 0.7246377
## Missing 1 54 496 129 2 83 0.8915033
vars_predict <- setdiff(x = names(train_df[,-1]),y = "Hospital.overall.rating")
predict_rf <- stats::predict(object = model_rf,test_df[vars_predict])
# Confusion Matrix
confusionMatrix(predict_rf,test_df$Hospital.overall.rating)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4 5 Missing
## 1 8 0 0 0 0 0
## 2 31 124 22 1 0 25
## 3 0 75 428 67 0 259
## 4 0 1 18 179 26 53
## 5 0 0 0 1 15 0
## Missing 0 1 18 8 1 19
##
## Overall Statistics
##
## Accuracy : 0.5601
## 95% CI : (0.5335, 0.5865)
## No Information Rate : 0.3522
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3895
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
## Sensitivity 0.205128 0.61692 0.8807 0.6992 0.35714
## Specificity 1.000000 0.93299 0.5515 0.9128 0.99925
## Pos Pred Value 1.000000 0.61084 0.5163 0.6462 0.93750
## Neg Pred Value 0.977405 0.93458 0.8947 0.9302 0.98021
## Prevalence 0.028261 0.14565 0.3522 0.1855 0.03043
## Detection Rate 0.005797 0.08986 0.3101 0.1297 0.01087
## Detection Prevalence 0.005797 0.14710 0.6007 0.2007 0.01159
## Balanced Accuracy 0.602564 0.77495 0.7161 0.8060 0.67820
## Class: Missing
## Sensitivity 0.05337
## Specificity 0.97266
## Pos Pred Value 0.40426
## Neg Pred Value 0.74719
## Prevalence 0.25797
## Detection Rate 0.01377
## Detection Prevalence 0.03406
## Balanced Accuracy 0.51301